(use-modules
 (utils csv)
 (decision-tree)
 (dataset)
 (tree)
 (utils string)
 (utils display)
 (prediction)
 ;; ice-9 format for format procedure
 (ice-9 format))



(define FILE-PATH
  "data_banknote_authentication.csv")

;; For each column we define a column converter, which converts the string,
;; which is read in from the CSV, to an appropriate data type for the data set
;; in the program.


(define COLUMN-CONVERTERS
  (list (list string->number)
        (list string->number)
        (list string->number)
        (list string->number)
        (list
         #;(lambda (val)
         (display (simple-format #f "converting: ~a\n" val))
         (display (simple-format #f "converted: ~a\n" (string->number val)))
         (string->number val))
         (lambda (val) (string->number (string-trim-both val))))))

;; Using the defined column converters, we define the data set.
(define banking-dataset
  (all-rows "data_banknote_authentication.csv" #:converters COLUMN-CONVERTERS))

;; This is an artefact from development. It serves as an example to test things
;; with interactively or in a shorter time than with a whole larger data set.
(define dev-dataset
  (list #(2.771244718 1.784783929 0)
        #(1.728571309 1.169761413 0)
        #(3.678319846 2.81281357 0)
        #(3.961043357 2.61995032 0)
        #(2.999208922 2.209014212 0)
        #(7.497545867 3.162953546 1)
        #(9.00220326 3.339047188 1)
        #(7.444542326 0.476683375 1)
        #(10.12493903 3.234550982 1)
        #(6.642287351 3.319983761 1)))



;; displays a string representation of a learned decision tree

(define-public print-tree
  (lambda (tree label-column-index)
    (define tree->string
      (lambda (tree depth)
        (cond
         [(leaf-node? tree)
          (string-append (n-times-string "  " depth)
                         "["
                         (number->string
                          (dataset-majority-prediction (node-data tree)
                                                       label-column-index))
                         "]\n")]
         [else
          (string-append
           (string-append (n-times-string "  " depth)
                          "[feature:"
                          (number->string (node-split-feature-index tree))
                          " < "
                          (number->string (node-split-value tree))
                          "]\n")
           (tree->string (node-left tree) (+ depth 1))
           (tree->string (node-right tree) (+ depth 1)))])))
    (displayln (tree->string tree 0))))


(display
 (simple-format
  #f "~a\n"
  (map (lambda (num) (format #f "~,3f\n" num))
       (evaluate-algorithm #:dataset (shuffle-dataset banking-dataset #:seed 12345)
                           #:n-folds 10
                           #:feature-column-indices '(0 1 2 3)
                           #:label-column-index 4
                           #:max-depth 6
                           #:min-data-points 12
                           #:min-data-points-ratio 0.02
                           #:min-impurity-split (expt 10 -7)
                           #:stop-at-no-impurity-improvement #t
                           #:random-seed 12345))))


;; (define tree
;;   (fit #:train-data (shuffle-dataset banking-dataset #:seed 12345)
;;        #:feature-column-indices (list 0 1 2 3)
;;        #:label-column-index 4
;;        #:max-depth 5
;;        #:min-data-points 12
;;        #:min-data-points-ratio 0.02
;;        #:min-impurity-split (expt 10 -7)
;;        #:stop-at-no-impurity-improvement #t))

;; (print-tree tree 4)
